from IPython.display import HTML
HTML('''<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/2.0.3/jquery.min.js "></script><script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.jp-CodeCell > div.jp-Cell-inputWrapper').hide();
} else {
$('div.jp-CodeCell > div.jp-Cell-inputWrapper').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);</script><form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>
''')
%%HTML
<script src="require.js"></script>
#!pip install pyquadkey2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default='notebook'
from pyquadkey2.quadkey import TileAnchor, QuadKey
from shapely.geometry import Point, Polygon, MultiPolygon
import geopandas as gpd
import folium
from shapely import wkb, wkt
from sklearn.preprocessing import MinMaxScaler
import itertools
from tqdm.notebook import tqdm
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql import SparkSession
spark = (SparkSession
.builder
.master('local[*]')
.getOrCreate())
class FigureLabeler:
"""
Add a figure or table number and caption
"""
def __init__(self):
self.fig_num = 1
self.table_num = 1
def fig_caption(self, title, caption):
global fig_num
"""Print figure caption on jupyter notebook"""
display(HTML(
f"""<p style="font-size:12px;font-style:default;"><b>
Figure {self.fig_num}. {title}.</b><br>{caption}</p>"""))
self.fig_num += 1
def table_caption(self, title, caption):
global table_num
"""Print table caption on jupyter notebook"""
display(HTML(
f"""<p style="font-size:12px;font-style:default;"><b>
Table {self.table_num}. {title}.</b><br>{caption}</p>""")
)
self.table_num += 1
def reset_to(self, fig_num=None, table_num=None):
"""Manually reset figure number or table number."""
if fig_num is not None:
self.fig_num = fig_num
if table_num is not None:
self.table_num = table_num
def qk_to_bbox(q):
"""Return the Rectangle patch anchor, height, and width of a quadkey"""
nw = QuadKey(q).to_geo(anchor=TileAnchor.ANCHOR_NW)
se = QuadKey(q).to_geo(anchor=TileAnchor.ANCHOR_SE)
return ((nw[1], nw[0]), np.abs(se[1]-nw[1]), np.abs(se[0]-nw[0]))
def qk_to_tile(q):
"""Return the tile (x, y) of a quadkey"""
return QuadKey(q).to_tile()[0]
def partition_df(df, zoom, connection_type, year, quarter, scaling=False):
"""Return a zoom-summarized DataFrame of a given type, year, and quarter"""
p_df = (df.filter((df['type'] == connection_type) &
(df['year'] == year) &
(df['quarter'] == quarter))
.withColumn(f'level_{zoom}', substring('quadkey', 1, zoom))
.groupby(f'level_{zoom}')
.agg(mean('avg_d_kbps').alias('avg_download_kbps'),
mean('avg_u_kbps').alias('avg_upload_kbps'),
mean('avg_lat_ms').alias('avg_latency_ms'),
mean('tests').alias('avg_tests'),
sum('tests').alias('total_tests'),
mean('devices').alias('avg_devices'),
sum('devices').alias('total_devices')
)
.sort(f'level_{zoom}')
.toPandas()
)
p_df['bbox'] = p_df[f'level_{zoom}'].apply(qk_to_bbox)
p_df = p_df.set_index(f'level_{zoom}')
if scaling is True:
scaler = MinMaxScaler()
p_df = pd.concat([p_df,
pd.DataFrame(scaler.fit_transform(p_df.iloc[:, :-1]),
index=p_df.index,
columns=['norm_'+i for i in p_df.columns[:-1]])],
axis=1)
p_df['tile'] = p_df['bbox'].apply(lambda x: Polygon([x[0],
(x[0][0]+x[1], x[0][1]),
(x[0][0]+x[1],
x[0][1]+x[2]),
(x[0][0], x[0][1]+x[2])
]))
p_df = gpd.GeoDataFrame(p_df, geometry='tile').set_crs(epsg=4326)
return p_df
def compiled_partition_df(df, zoom, scaling=False):
"""Return a complete zoom-summarized Pandas DataFrame"""
connection_type = ['fixed', 'mobile']
year = [2019, 2020, 2021, 2022]
quarter = [1, 2, 3, 4]
p_df = pd.DataFrame()
for t, y, q in tqdm(itertools.product(connection_type, year, quarter)):
new_df = partition_df(df, zoom, t, y, q, scaling=scaling)
new_df = new_df.assign(type=[t]*len(new_df),
year=[y]*len(new_df),
quarter=[q]*len(new_df))
p_df = pd.concat([p_df, new_df])
p_df = p_df.set_index(p_df.reset_index()[f'level_{zoom}'].apply(qk_to_tile))
p_df['tile'] = p_df['bbox'].apply(lambda x: Polygon([x[0],
(x[0][0]+x[1], x[0][1]),
(x[0][0]+x[1],
x[0][1]+x[2]),
(x[0][0], x[0][1]+x[2])
]))
p_df = gpd.GeoDataFrame(p_df, geometry='tile').set_crs(epsg=4326)
return p_df
labeler = FigureLabeler()
Ookla Open Data is a platform provided by Ookla, the company behind Speedtest.net, which offers access to vast global internet performance data. It allows users to explore and analyze broadband and mobile network statistics worldwide. Ookla collects data from millions of devices running their Speedtest application and compiles it into comprehensive datasets. These datasets cover various metrics such as download speed, upload speed, latency, and signal strength across different geographical locations. Ookla Open Data promotes transparency and helps drive data-driven decision-making in internet connectivity. This data was utilized by governments, regulators, internet service providers, and researchers to understand network performance, identify areas for improvement, and make informed policy decisions.
Given this rich source of data, this study compared and analyzed the performance of fixed and mobile network types in various aspects, including kilobits per second (kbps), latency, number of users, and geographic location.
The study determined the areas where fixed networks excel over mobile networks and vice versa, providing insights into these network types' relative strengths and weaknesses. The analysis confirmed that fixed lines performed better than mobile on all aspects, except that the performance of fixed lines are more variable, and ultimately the preference between both would come down to the importance of mobility, upload speed, and availability in the geographic region of the user.
By examining these performance factors, the study recommends that these conclusions can be furthered for a better understanding of the comparative advantages and limitations of fixed and mobile networks if examined in a case-by-case basis taking into account electricity availability, reliability and other different factors that affect connectivity per geographic subregion.
In what aspects (kbps, latency, number of users, and geographic location) do fixed and mobile network types outperform each other?
In today's digital age, reliable and high-speed internet access has become essential for social, economic, and educational opportunities. However, significant disparities in internet accessibility and quality persist, hindering the progress of underserved communities. The availability of a comprehensive global speed test dataset presents a unique opportunity to address these challenges and bridge the digital divide. By processing and analyzing this dataset, we can unlock valuable insights that empower individuals, organizations, and policymakers to take targeted actions, improve internet infrastructure, and create a more inclusive digital future.
| Field Name | Type | Description |
|---|---|---|
| avg_d_kbps | Integer | The average latency of all tests performed in the tile, represented in milliseconds |
| avg_u_kbps | Integer | The average upload speed of all tests performed in the tile, represented in kilobits per second |
| avg_lat_ms | Integer | The average latency of all tests performed in the tile, represented in milliseconds |
| tests | Integer | The number of tests taken in the tile |
| devices | Integer | The number of unique devices contributing tests in the tile |
| quadkey | String | The quadkey representing the tile |
| tile | String | The geographic WKT format representation of the quadkey tile |
| type | String | Whether the speedtest observation is a fixed or mobile connection |
| year | Integer | The year the speedtest observation was conducted |
| quarter | Integer | The quarter the speedtest observation was conducted |
The Ookla platform generates a vast amount of Speedtest data every month from people who avail of the speedtest on their website, Speedtest.com. This data is readily found on AWS's Registry of Open Data, with documentation provided here. The dataset is saved into parquet files partitioned by type (fixed or mobile connection), year, and quarter, covering the years of 2019 to 2022. A list of features can be seen in Table 1.
The geographic information of each speedtest entry was stored through Well Known Text (WKT) geometry in tiles, following the quadkey system to manage spatial joins effectively. Under this system, the world is subdivided by quarters a certain number of times, or a zoom level (z). The resulting tiles are defined as the quartered fractions of the Earth's width/height according to the Web Mercator projection (EPSG:3857), and their dimensions can be estimated in meters, taking into account slight variations due to latitude.
In the raw dataset, a zoom level (z) of 16 was used for tiling, equivalent to an 18-arcsecond block. This means the projection of the world was subdivided by quarters 16 times. For each speedtest observation, a 16-digit long quadkey is provided, where each digit is an integer from 0 to 3 corresponding to which quarter that tile belongs to at that zoom level (see image below for an example of z = 1 to 3)

The Ookla Speedtest Dataset used in this report came from parquet files that were taken from and made available to the team via the JOJIE public dataset (/mnt/data/public/speedtest) from the Asian Institute of Management, although a copy of this may also be accessed in AWS's Registry of Open Data.
Since the shapefiles in this dataset are tiles (not the exact shape of the country), for geospatial analysis, subregions were taken from Natural Earth Shapefiles, specifically the 1:50m Cultural Vectors at Admin 0 - Countries, which can be accessed here.
Below is a sample of the raw dataset read as a Spark DataFrame. This data includes all fixed and mobile type connections, all years, and all quarters each speedtest was observed from 2019 to 2022. The WKT tile format is read in string format, but will be converted during preprocessing.
# Reading the files through Spark
df = spark.read.parquet('/mnt/data/public/speedtest/parquet')
# Registering df to catalogue
df.createOrReplaceTempView('performance')
# Displaying a sample of 10
display(df.limit(10).toPandas().head())
labeler.reset_to(table_num=2)
labeler.table_caption('Raw Ookla Speedtest Performance Dataset',
'The tile is a Well-Known Text (WKT) shapefile in string format'
)
| quadkey | tile | avg_d_kbps | avg_u_kbps | avg_lat_ms | tests | devices | type | year | quarter | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0022133222312322 | POLYGON((-160.02685546875 70.6435894914449, -1... | 19110 | 7891 | 77 | 3 | 2 | mobile | 2022 | 1 |
| 1 | 0022133222330023 | POLYGON((-160.043334960938 70.6363054807905, -... | 21870 | 11875 | 83 | 2 | 1 | mobile | 2022 | 1 |
| 2 | 0022133222330032 | POLYGON((-160.037841796875 70.6363054807905, -... | 14157 | 14560 | 75 | 14 | 2 | mobile | 2022 | 1 |
| 3 | 0022133222330100 | POLYGON((-160.02685546875 70.6417687358462, -1... | 5468 | 9886 | 83 | 1 | 1 | mobile | 2022 | 1 |
| 4 | 0022133222330102 | POLYGON((-160.02685546875 70.6399478155463, -1... | 24311 | 16243 | 72 | 1 | 1 | mobile | 2022 | 1 |
Table 2. Raw Ookla Speedtest Performance Dataset.
The tile is a Well-Known Text (WKT) shapefile in string format
Below is the shapefile summary of the Natural Earth Cultural Vector Shapefiles. Each row represents a country's shape in an EPSG:4326 projection of the world.
countries = gpd.read_file('map').set_crs(epsg=4326)
display(countries.head())
labeler.table_caption('Raw Natural Earth Cultural Vector Shapefiles',
'The table was coverted into Geopandas'
)
ERROR 1: PROJ: proj_create_from_database: Open of /opt/conda/share/proj failed
| featurecla | scalerank | LABELRANK | SOVEREIGNT | SOV_A3 | ADM0_DIF | LEVEL | TYPE | TLC | ADMIN | ... | FCLASS_TR | FCLASS_ID | FCLASS_PL | FCLASS_GR | FCLASS_IT | FCLASS_NL | FCLASS_SE | FCLASS_BD | FCLASS_UA | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Admin-0 country | 1 | 3 | Zimbabwe | ZWE | 0 | 2 | Sovereign country | 1 | Zimbabwe | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | POLYGON ((31.28789 -22.40205, 31.19727 -22.344... |
| 1 | Admin-0 country | 1 | 3 | Zambia | ZMB | 0 | 2 | Sovereign country | 1 | Zambia | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | POLYGON ((30.39609 -15.64307, 30.25068 -15.643... |
| 2 | Admin-0 country | 1 | 3 | Yemen | YEM | 0 | 2 | Sovereign country | 1 | Yemen | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | MULTIPOLYGON (((53.08564 16.64839, 52.58145 16... |
| 3 | Admin-0 country | 3 | 2 | Vietnam | VNM | 0 | 2 | Sovereign country | 1 | Vietnam | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | MULTIPOLYGON (((104.06396 10.39082, 104.08301 ... |
| 4 | Admin-0 country | 5 | 3 | Venezuela | VEN | 0 | 2 | Sovereign country | 1 | Venezuela | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | MULTIPOLYGON (((-60.82119 9.13838, -60.94141 9... |
5 rows × 169 columns
Table 3. Raw Natural Earth Cultural Vector Shapefiles.
The table was coverted into Geopandas
For this particular study, the most important columns are the geometry column containing the shape information, and the SUBREGION column containing what region each country belongs to. A full list of the subregions defined by the shapefiles are as follows:
fig, ax = plt.subplots(1, 2, figsize=(12,4), dpi=300)
# World by cultural boundaries
countries.plot(ax=ax[0])
ax[0].set_title('World by Cultural Boundaries')
ax[0].set_xticks([])
ax[0].set_yticks([])
# World by subregions
temp = countries.dissolve(by='SUBREGION')[['geometry']]
temp['subregion'] = temp.index
temp.plot(ax=ax[1], column='subregion')
ax[1].set_title('World by Subregions')
ax[1].set_xticks([])
ax[1].set_yticks([])
plt.show()
labeler.reset_to(table_num=3)
labeler.fig_caption('Natural Earth Cultural World Map',
'Figure shows the world divided by boundaries and subregion,'
' according to Natural Earth Cultural boundaries.'
)
Figure 1. Natural Earth Cultural World Map.
Figure shows the world divided by boundaries and subregion, according to Natural Earth Cultural boundaries.
| Subregion | Number of Countries |
|---|---|
| Caribbean | 25 |
| Eastern Africa | 19 |
| Western Asia | 19 |
| Western Africa | 17 |
| Southern Europe | 16 |
| Northern Europe | 15 |
| South America | 13 |
| South-Eastern Asia | 11 |
| Southern Asia | 10 |
| Eastern Europe | 10 |
| Polynesia | 9 |
| Western Europe | 9 |
| Middle Africa | 9 |
| Eastern Asia | 8 |
| Central America | 8 |
| Micronesia | 7 |
| Northern Africa | 7 |
| Northern America | 5 |
| Seven seas (open ocean) | 5 |
| Melanesia | 5 |
| Southern Africa | 5 |
| Central Asia | 5 |
| Australia and New Zealand | 4 |
| Antarctica | 1 |
In this section, the team prepared, cleaned, and preprocessed the collected dataset to ensure its quality and suitability for further analysis. This involved creating two main tables for the ease of later analysis:
Using Spark SQL, the Summary Statistics Table was generated statistics for analysis. A preview can be found below:
# Creating summary statistics dataframe
df_avg = (spark.sql(
"""
SELECT
year,
quarter,
type,
AVG(avg_d_kbps) AS average_download_kbps,
AVG(avg_u_kbps) AS average_upload_kbps,
AVG(avg_lat_ms) AS average_latency,
VAR_POP(avg_d_kbps) AS variance_download_kbps,
VAR_POP(avg_u_kbps) AS variance_upload_kbps,
VAR_POP(avg_lat_ms) AS variance_latency,
SUM(devices) AS total_devices,
AVG(devices) AS avg_devices,
SUM(tests) AS total_tests,
AVG(tests) AS avg_tests
FROM performance
GROUP BY year, quarter, type;
""")).toPandas().sort_values(by='year')
# Feature engineering additional fields
df_avg['continuous_quarter'] = df_avg['year'] + df_avg['quarter'] / 4
df_avg['speed'] = (df_avg['average_download_kbps'] +
df_avg['average_upload_kbps']) / 2000
df_avg.sort_values(by='continuous_quarter', inplace=True)
display(df_avg.head())
labeler.reset_to(table_num=5)
labeler.table_caption('Processed Summary Statistics Table',
'The table shows the computed statistics using spark sql.'
' This is further enhanced by adding feature engineered fields'
)
| year | quarter | type | average_download_kbps | average_upload_kbps | average_latency | variance_download_kbps | variance_upload_kbps | variance_latency | total_devices | avg_devices | total_tests | avg_tests | continuous_quarter | speed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 13 | 2019 | 1 | fixed | 50879.295999 | 21231.945033 | 41.564853 | 3.607452e+09 | 1.356275e+09 | 17734.652032 | 20877133 | 4.280701 | 69942821 | 14.341256 | 2019.25 | 36.055621 |
| 12 | 2019 | 1 | mobile | 25139.942852 | 9392.571670 | 51.504502 | 7.852244e+08 | 8.258373e+07 | 3192.574972 | 10618097 | 3.286070 | 21592933 | 6.682543 | 2019.25 | 17.266257 |
| 4 | 2019 | 2 | mobile | 25936.895617 | 9544.628041 | 50.556830 | 8.777544e+08 | 8.589149e+07 | 3240.006467 | 10495087 | 3.142064 | 21234141 | 6.357168 | 2019.50 | 17.740762 |
| 5 | 2019 | 2 | fixed | 52623.905091 | 22332.541222 | 41.797200 | 3.869277e+09 | 1.493373e+09 | 16689.769756 | 20615874 | 4.244102 | 68716230 | 14.146317 | 2019.50 | 37.478223 |
| 25 | 2019 | 3 | mobile | 28449.075827 | 10025.894767 | 48.731881 | 1.155145e+09 | 9.198146e+07 | 2895.101348 | 14067596 | 3.506218 | 28273464 | 7.046899 | 2019.75 | 19.237485 |
Table 5. Processed Summary Statistics Table.
The table shows the computed statistics using spark sql. This is further enhanced by adding feature engineered fields
The map data was pre-processed by creating a boolean column to check if the tile intersects a subregion. A preview of the geospatial table can be found below. Because some tiles can intersect multiple subregions, we will find that some observations will not be mutually exclusive during geospatial analysis.
# Creating a complete geopandas df (all types, years, and quarters) with zoom=7
# 2 types x 4 years x 4 quarters = 32 scans
df_geo = compiled_partition_df(df, 7)
# Create boolean columns of whether a tile intersects a subregion
for region, geom in countries.dissolve(by='SUBREGION')['geometry'].items():
df_geo[region] = df_geo.intersects(geom)
# Feature engineering additional fields
df_geo['year_quarter'] = 'Q' + df_geo['quarter'].astype(str) + ' ' + df_geo['year'].astype(str)
df_geo['speed'] = (df_geo['avg_download_kbps'] + df_geo['avg_upload_kbps']) / 2000
display(df_geo.head())
labeler.table_caption('Processed Geospatial Table',
'The table preserves the tiles WKT shape data with an additional subregion column.'
' The index of the DataFrame refers to the x,y tile it belongs to in the quadkeys system'
' at zoom level 7 (16 digit quadkeys grouped according to its first 7 digits)'
)
0it [00:00, ?it/s]
| avg_download_kbps | avg_upload_kbps | avg_latency_ms | avg_tests | total_tests | avg_devices | total_devices | bbox | tile | type | ... | South America | South-Eastern Asia | Southern Africa | Southern Asia | Southern Europe | Western Africa | Western Asia | Western Europe | year_quarter | speed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| level_7 | |||||||||||||||||||||
| (7, 27) | 8421.500000 | 3552.500000 | 43.250000 | 1.250000 | 5 | 1.250000 | 5 | ((-160.3125, 71.524909037328), 2.8125, 0.91229... | POLYGON ((-160.31250 71.52491, -157.50000 71.5... | fixed | ... | False | False | False | False | False | False | False | False | Q1 2019 | 5.987000 |
| (4, 30) | 3444.333333 | 3164.333333 | 46.666667 | 2.000000 | 6 | 1.333333 | 4 | ((-168.75, 68.656554984757), 2.8125, 1.0473343... | POLYGON ((-168.75000 68.65655, -165.93750 68.6... | fixed | ... | False | False | False | False | False | False | False | False | Q1 2019 | 3.304333 |
| (6, 30) | 772.000000 | 2797.000000 | 40.000000 | 4.000000 | 4 | 4.000000 | 4 | ((-163.125, 68.656554984757), 2.8125, 1.047334... | POLYGON ((-163.12500 68.65655, -160.31250 68.6... | fixed | ... | False | False | False | False | False | False | False | False | Q1 2019 | 1.784500 |
| (6, 31) | 15211.750000 | 20763.250000 | 34.833333 | 2.750000 | 33 | 1.333333 | 16 | ((-163.125, 67.609220604964), 2.8125, 1.095960... | POLYGON ((-163.12500 67.60922, -160.31250 67.6... | fixed | ... | False | False | False | False | False | False | False | False | Q1 2019 | 17.987500 |
| (8, 27) | 6676.400000 | 5286.666667 | 41.533333 | 2.266667 | 34 | 1.400000 | 21 | ((-157.5, 71.524909037328), 2.8125, 0.91229479... | POLYGON ((-157.50000 71.52491, -154.68750 71.5... | fixed | ... | False | False | False | False | False | False | False | False | Q1 2019 | 5.981533 |
5 rows × 38 columns
Table 6. Processed Geospatial Table.
The table preserves the tiles WKT shape data with an additional subregion column. The index of the DataFrame refers to the x,y tile it belongs to in the quadkeys system at zoom level 7 (16 digit quadkeys grouped according to its first 7 digits)
The data is now ready for exploration. The following figures in this section provide the most interesting comparative visualizations between fixed and mobile data performance through Comparative Summary Statistics, Time Series Visualizations, and Geospatial Visualizations.
In this section, summary statistics pertaining to general fixed and mobile connection performance without respect to time or space are focused on.
# Convert kbps to Mbps
df_geo['avg_download_mbps'] = df_geo['avg_download_kbps'] / 1000
df_geo['avg_upload_mbps'] = df_geo['avg_upload_kbps'] / 1000
# Plot download speed vs upload speed with linear trendline
fig = px.scatter(df_geo,
x='avg_download_mbps',
y='avg_upload_mbps',
color='type',
animation_frame='year_quarter',
range_x=(-1, 500),
range_y=(-1, 300),
trendline='ols'
)
# Demarcate Mbps line
fig.add_vline(x=100,
line_width=1,
line_dash='dot',
line_color='gray',
annotation_text="100 Mbps",
annotation_position="top right")
fig.update_layout(yaxis_title='Average Upload Speed (Mbps)',
xaxis_title='Average Download Speed (Mbps)',
title='Average Download Speed in Mbps through Time',
template='plotly_white')
fig.show()
labeler.fig_caption('Average Download Speed in Mbps through Time',
''
)
Figure 2. Average Download Speed in Mbps through Time.
Despite significant advancements in mobile communications, particularly with the introduction of 5G technology, fixed lines still tend to outperform mobile networks in general due to the limitations on the transfer speed and the number of concurrent users. However, it is worth noting that mobile networks have made significant progress and can now match the download speeds offered by fixed lines. Additionally, the performance of mobile networks is generally more consistent and reliable than their fixed-line counterparts, which can be prone to fluctuations in performance.
Furthermore, an interesting observation from this comparison is the availability of Burst Speed, which refers to the maximum data transfer rate that a device or connection can achieve. This Burst Speed depends on seasonality, as indicated by a pulsating graph. When considering a broadband or prepaid plan, it is advisable to focus on the sustained speed rather than the maximum speed, as there may be a significant difference between the two. While a connection may boast a high maximum speed, the actual sustained speed experienced regularly might be lower. While fixed lines generally offer better performance than mobile networks due to their inherent limitations, mobile networks have made significant progress and can now match download speeds. Mobile networks also provide more consistent performance, and when selecting a plan, it is crucial to consider the sustained speed rather than just the maximum speed to ensure a satisfactory user experience.
The visualizations on this section focus on changes in fixed and mobile performance over time.
fig = go.Figure()
# Add scatter trace for 'fixed' type
fig.add_trace(go.Scatter(x=df_avg[df_avg['type'] == 'fixed']['continuous_quarter'],
y=df_avg[df_avg['type'] == 'fixed']['total_devices'],
mode='lines+markers+text', name='Fixed'))
# Add scatter trace for 'mobile' type
fig.add_trace(go.Scatter(x=df_avg[df_avg['type'] == 'mobile']['continuous_quarter'],
y=df_avg[df_avg['type'] == 'mobile']['total_devices'],
mode='lines+markers+text', name='Mobile'))
# Update x-axis labels
ticktext = ['Q{} {}'.format(q, y) for q, y in zip(df_avg['quarter'],
df_avg['year'])]
fig.update_layout(xaxis=dict(tickmode='array', tickvals=df_avg['continuous_quarter'],
ticktext=ticktext))
# Set axis labels and title
fig.update_layout(xaxis_title='Quarter and Year',
yaxis_title='Number of Devices',
template='plotly_white',
title='Number of Devices surveyed by Type')
fig.show()
labeler.fig_caption('Number of Devices surveyed by Type',
''
)
Figure 3. Number of Devices surveyed by Type.
As displayed in this graph, more consumers are now aware to measure their internet speed. This peaked during the pandemic period (2020) and started to dip on Q4 2020. This need only reflected on fixed lines not on mobile platforms.
fig = go.Figure()
# Add scatter trace for 'fixed' type
fig.add_trace(go.Scatter(x=df_avg[df_avg['type'] == 'fixed']['continuous_quarter'],
y=df_avg[df_avg['type'] == 'fixed']['total_tests'],
mode='lines+markers+text', name='Fixed'))
# Add scatter trace for 'mobile' type
fig.add_trace(go.Scatter(x=df_avg[df_avg['type'] == 'mobile']['continuous_quarter'],
y=df_avg[df_avg['type'] == 'mobile']['total_tests'],
mode='lines+markers+text', name='Mobile'))
# Update x-axis labels
ticktext = ['Q{} {}'.format(q, y) for q, y in zip(df_avg['quarter'], df_avg['year'])]
fig.update_layout(xaxis=dict(tickmode='array',
tickvals=df_avg['continuous_quarter'],
ticktext=ticktext))
# Set axis labels and title
fig.update_layout(xaxis_title='Quarter and Year',
yaxis_title='Frequency of Tests',
template='plotly_white',
title='Number of tests performed')
fig.show()
labeler.fig_caption('Number of tests performed',
''
)
Figure 4. Number of tests performed.
Similar to the previous statistics, this reflected the need of people to have a reliable internet connection particularly during pandemic where most work and school activities are done online. This need only reflected on fixed lines not on mobile platforms.
fig1 = go.Figure()
fig2 = go.Figure()
# FIG 1
# Add scatter trace for 'fixed' type
fig1.add_trace(go.Scatter(x=(df_avg[df_avg['type'] == 'fixed']['continuous_quarter']),
y=df_avg[df_avg['type'] == 'fixed']['speed'],
mode='lines+markers+text', name='Fixed',
line=dict(color='blue'),
marker=dict(color='blue')))
# Add scatter trace for 'mobile' type
fig1.add_trace(go.Scatter(x=df_avg[df_avg['type'] == 'mobile']['continuous_quarter'],
y=df_avg[df_avg['type'] == 'mobile']['speed'],
mode='lines+markers+text', name='Mobile',
line=dict(color='red'), marker=dict(color='red')))
# Update x-axis labels
ticktext = ['Q{} {}'.format(q, y) for q, y in zip(df_avg['quarter'],
df_avg['year'])]
fig1.update_layout(xaxis=dict(tickmode='array',
tickvals=df_avg['continuous_quarter'],
ticktext=ticktext))
# Set axis labels and title
fig1.update_layout(xaxis_title='Quarter and Year', yaxis_title='Latency (ms)', title='Average Latency by Type')
# FIG 2
# Add scatter trace for 'fixed' type
fig2.add_trace(go.Scatter(x=df_avg[df_avg['type'] == 'fixed']['continuous_quarter'],
y=df_avg[df_avg['type'] == 'fixed']['average_latency'],
mode='lines+markers+text', name='Fixed',
line=dict(color='blue'), marker=dict(color='blue'),
showlegend=False))
# Add scatter trace for 'mobile' type
fig2.add_trace(go.Scatter(x=df_avg[df_avg['type'] == 'mobile']['continuous_quarter'],
y=df_avg[df_avg['type'] == 'mobile']['average_latency'],
mode='lines+markers+text', name='Mobile',
line=dict(color='red'), marker=dict(color='red'),
showlegend=False))
# Update x-axis labels
ticktext = ['Q{} {}'.format(q, y) for q, y in zip(df_avg['quarter'], df_avg['year'])]
fig2.update_layout(xaxis=dict(tickmode='array', tickvals=df_avg['continuous_quarter'], ticktext=ticktext))
# Set axis labels and title
fig2.update_layout(xaxis_title='Quarter and Year',
yaxis_title='Latency (ms)',
title='Average Latency by Type',
template='plotly_white')
fig = make_subplots(rows=2, cols=1)
fig.add_trace(fig1.data[0], row=1, col=1)
fig.add_trace(fig1.data[1], row=1, col=1)
fig.add_trace(fig2.data[0], row=2, col=1)
fig.add_trace(fig2.data[1], row=2, col=1)
fig.update_yaxes(title_text="Average Speed (mbps) Higher is better", row=1, col=1)
fig.update_yaxes(title_text="Average Latency (ms) Lower is better", row=2, col=1)
fig.update_xaxes(visible=False, row=2, col=1)
fig.update_layout(height=700, #width=1000,
title_text="Average Speed and Latency Comparison",
template='plotly_white',
xaxis=dict(tickmode='array',
tickvals=df_avg['continuous_quarter'],
ticktext=ticktext))
fig.show()
labeler.reset_to(fig_num=6)
labeler.fig_caption('Average Speed and Latency Comparison', ' ')
Figure 6. Average Speed and Latency Comparison.
This graph shows that internet services are improving in general. Average speed is increasing over time and average latency is decreaing over time for both connection types. Fixed connections also unanimously outperform mobile connections.
display(HTML(f"<center>{df_avg.groupby('type')[['average_latency', 'variance_latency']].mean().to_html()}</center>"))
labeler.reset_to(table_num=7)
labeler.table_caption('Average Mean and Variance Latency Comparison', ' ')
| average_latency | variance_latency | |
|---|---|---|
| type | ||
| fixed | 31.128697 | 8056.123697 |
| mobile | 43.199709 | 2439.105923 |
Table 7. Average Mean and Variance Latency Comparison.
However, despite being faster with lower latency, fixed lines experience much more variability in terms of performance, as shown in table 7.
# Create histogram of average latency per zoom=7 tile
fig = px.histogram(df_geo, x="avg_latency_ms",
color='type',
barmode='overlay',
marginal='rug',
height=700,
range_y=(0, 350)
)
# Demarkate 1 second line
fig.add_vline(x=1000,
line_width=1,
line_dash='dot',
line_color='gray',
annotation_text="1 second latency",
annotation_position="top right")
# Demarkate median latency line
fig.add_vline(x=df_geo['avg_latency_ms'].quantile(0.5),
line_width=1,
line_dash='dot',
line_color='white',
annotation_text="median latency\n(46ms)",
annotation_position="top right")
fig.update_layout(xaxis_title='Average Latency (ms)',
yaxis_title='Count',
title='Histogram of Average Latency (in milliseconds)',
template='plotly_white')
fig.show()
labeler.reset_to(fig_num=7)
labeler.fig_caption('Histogram of Average Latency (in milliseconds)',
'')
Figure 7. Histogram of Average Latency (in milliseconds).
Visualizing the distribution of fixed and mobile connections, we can see that the median latency of all speedtests are 46 milliseconds, way less that the one second mark. Despite this, there is a bump in latency between 500ms and 1000ms, or 0.5 to 1 seconds. This bump is present in both fixed and mobile connections.
Unique to fixed connections, however, is the spread of extreme latency cases. Mobile connections will max out at the 2000ms or 2 second mark, but a few fixed connections will exceed this mark all the way to 4000ms or 4 seconds.
The visualizations on this section focus on changes in fixed and mobile performance over time.
# Transform geospatial dataset to calculate number of devices per region
subregion_df = pd.DataFrame()
for region in countries['SUBREGION'].unique():
subregion_df[region] = df_geo.groupby([region, 'type'])[
'total_devices'].sum(numeric_only=True)[(True)]
subregion_df = (pd.melt(subregion_df.T.reset_index(),
id_vars='index',
value_vars=['fixed', 'mobile'],
var_name='type', value_name='total_devices')
.rename(columns={'index': 'SUBREGION'}))
subregion_df = (subregion_df.assign(all_devices=subregion_df.
groupby('SUBREGION')
.transform('sum', numeric_only=True))
.sort_values('all_devices', ascending=True))
# Plot barplot of total devices per region (some tiles overlap many regions)
fig = px.bar(subregion_df.iloc[-20:],
x='total_devices',
y='SUBREGION',
color='type',
text_auto='.2s',
orientation='h',
barmode='group',
height=700
)
fig.update_layout(xaxis_title='Subregion',
yaxis_title='Count of Total Devices',
title='Total Number of Speedtested Devices per Subregion (2019-2022)',
template='plotly_white')
fig.show()
labeler.reset_to(fig_num=8)
labeler.fig_caption('Total Number of Speedtested Devices per Subregion (2019-2022)',
'')
Figure 8. Total Number of Speedtested Devices per Subregion (2019-2022).
When dividing by subregion, we can see that Northern America's fixed connection devices surpass any other region by far, measuring 120 million total devices from 2019 to 2022. Northern America contributes the most devices in general to Speedtest.com. Mobile connection devices lag behind considerably. Interesting to note, however, is that the Asia subregion runner-ups (South-Eastern, Southern, and Eastern Asia) have less of a gap between total fixed and mobile devices.
# Transform geospatial dataset to calculate % fixed devices per tile
percent_df_f = partition_df(df, 7, 'fixed', 2022, 4)
percent_df_m = partition_df(df, 7, 'mobile', 2022, 4)
percent_df = percent_df_f[['total_devices',
'tile']].merge(percent_df_m[['total_devices',
'tile']],
how='outer',
on='tile',
suffixes=['_fixed', '_mobile']
)
percent_df['total_devices'] = percent_df[[
'total_devices_fixed', 'total_devices_mobile']].sum(axis=1)
percent_df['% fixed'] = (
percent_df['total_devices_fixed'] / percent_df['total_devices']) * 100
percent_df = percent_df.drop(['total_devices_fixed',
'total_devices_mobile',
'total_devices',
], axis=1)
percent_df = percent_df.fillna(0)
percent_df['% fixed'] = percent_df['% fixed'].astype(int)
# Get shape intersection of country shapes and tile shapes
res_union = percent_df.overlay(countries, how='intersection')
# Plot choropleth of % fixed users
fig = px.choropleth(res_union,
geojson=res_union.geometry,
locations=res_union.index,
color='% fixed',
color_continuous_scale='RdYlBu',
hover_data=['SUBREGION', 'SOVEREIGNT'],
height=500
)
fig.update_geos(fitbounds="locations", visible=True)
fig.update_layout(
title_text='Percent of Fixed Connection Devices over Total Devices (Q4 2022)'
)
fig.update(layout = dict(title=dict(x=0.5)))
fig.update_layout(
margin={"r":0,"t":30,"l":0,"b":10},
coloraxis_colorbar={
'title':'% Fixed'})
fig.update_traces(marker_line_width=0)
fig.show()
labeler.fig_caption('Percent of Fixed Connection Devices over Total Devices (Q4 2022)',
'A greater percentage of fixed connection (blue tiles) '
'means fixed devices dominate mobile. A tile with less '
'fixed connections (red tiles) means mobile dominates fixed.')
Figure 9. Percent of Fixed Connection Devices over Total Devices (Q4 2022).
A greater percentage of fixed connection (blue tiles) means fixed devices dominate mobile. A tile with less fixed connections (red tiles) means mobile dominates fixed.
When dividing by tiles, we can better see the preference of the entire American continent (both North and South) for fixed connection devices over mobile ones, with little to no mobile connection devices in the Northern-most parts of Northern America, found in Canada.
As for the other regions, mobile devices are more preferred in Western, Northern, and Eastern Africa. The exception to this is the tip of South Africa. This is consistent with Africa's Mobile Market, which expects half the population in Sub-Saharan Africa will subscribe to mobile services by 2025 (GSMA, 2019).
Eastern, Southern, and South-Eastern Asia overall have a more even preference for speedtesting mobile and fixed devices, like India. Although there are exceptions to this, such as countries like China which have more preference for fixed connections, and countries like Iran which have more preference for mobile connections.
This percentage may only suggest what devices are more prevalently used for personal entertainment and online services in these countries, but are not a strong indicator of whether these devices are actually physically more prevalent in these countries as these are just devices that choose to take Speedtest.com's test.
# Transform geospatial dataset to calculate avg download speed per per tile
speed_df_f = partition_df(df, 7, 'fixed', 2022, 4)
speed_df_m = partition_df(df, 7, 'mobile', 2022, 4)
speed_df = speed_df_f[['avg_download_kbps',
'tile']].merge(speed_df_m[['avg_download_kbps',
'tile']],
how='outer',
on='tile')
speed_df['avg_download_kbps'] = speed_df[['avg_download_kbps_x',
'avg_download_kbps_y']].mean(axis=1)
speed_df = speed_df.drop(['avg_download_kbps_x',
'avg_download_kbps_y'], axis=1)
# Get shape intersection of country shapes and tile shapes
res_union = speed_df.overlay(countries, how='intersection')
# Plot choropleth of average speed
fig = px.choropleth(res_union,
geojson=res_union.geometry,
locations=res_union.index,
color='avg_download_kbps',
color_continuous_scale='viridis',
hover_data=['SUBREGION', 'SOVEREIGNT'],
height=500
)
fig.update_geos(fitbounds="locations", visible=True)
fig.update_layout(
title_text='Average Download Speed (Fixed and Mobile) (Q4 2022)'
)
fig.update(layout = dict(title=dict(x=0.5)))
fig.update_layout(
margin={"r":0,"t":30,"l":0,"b":10},
coloraxis_colorbar={
'title':'kbps'})
fig.update_traces(marker_line_width=0)
fig.show()
labeler.fig_caption('Average Download Speed (Fixed and Mobile) (Q4 2022)',
'')
Figure 10. Average Download Speed (Fixed and Mobile) (Q4 2022).
Geographic location does make a difference to download speed. As of the latest data, Q4 2022, we can see that China is alight with the fastest download speed (both fixed and mobile) in the world. North-Eastern Asia, Northern America, parts of Australia, and Western Europe follow suit. The color patterns that emerged in Figure 9 do not match with Figure 10. Having a dominantly fixed connection location does not necessarily dictate that it will also have faster download speeds.
After going through the data, we generalize our findings to the following insights:
Connection speed is improving overall. Over time, both fixed and mobile connections have been improving. From 2019 to 2022, latency has been decreasing and speed increasing. While mobile lags behind fixed in all aspects, it continues to share these improvement trends with fixed connections as connection infrastructures continue to scale and improve. This improvement was also unaffected by the uptake of device users in the 2020s due to COVID-19 lockdown conditions.
Mobile connections are more consistent. Despite falling behind, mobile connection speedtests had less variance in their latency at up to around 2 seconds, compared to fixed connections, which span up to 4 seconds in some extreme cases. Mobile connections also follow a more linear trend in its download-to-upload speed, where every increase in download speed leads to only a small, but more consistent rise in upload speed. In the case of fixed connections, cases of fast upload but slow download speeds are as common as the opposite, with a less apparent trendline.
Connection speed is geographically disparate. Seen in Figure 10 above, most of the world is shrouded in slower download speeds regardless of the connection type that dominates that location. For example, both Northern and Southern America subregions have high preference towards fixed connections (Figure 9), but Southern America has overall slower speeds that Northern America (Figure 10). Faster speeds are dependent on the quality of the connection infrastructure available to that location, regardless if it is fixed or mobile.
We begain this comparative review by asking: in what aspects (kbps, latency, number of users, and geographic location) do fixed and mobile network types outperform each other?
After exploring Ookla's speedtest data on fixed and mobile connections, we can now conclude the following:
| Criterion | Winner |
|---|---|
| Faster Download Speed (kbps) | Fixed |
| Faster Upload Speed (kbps) | Fixed |
| Lower Latency (ms) | Fixed |
| Greaeter Number of Users (total devices) | Fixed |
| Geographic Location | Fixed/Mobile |
Does this mean that fixed connections are superior? Not necessarily. We discovered that fixed connections also have a larger variance in latency and download-to-upload speeds. The benefit of a fixed connection is supposedly to lower latency, however there are instances where latency extends to as long as 4 seconds that are not observed in mobile connections. While upload speeds in fixed connections are superior, for download speeds, mobile connections stand on par.
The difference in preference lies in your need for mobility (as fixed is a stationary connection), upload speed (which is generally better in fixed connections), and the availability of either service in your geographic location. While most of the Americas, Western Europe, and China, among others prefer to speedtest fixed devices, countries found in Africa prefer mobile, and countries found in Southern and Eastern Asia like India evenly prefer the two. The quality of either type may vary depending on what is most popular in your area.
Despite this, overall, we can see that regardless of connection type, connections in general are improving. Both fixed and mobile connections are following a trend of decreasing average latency and increasing average download and upload speed over time (from 2019 to 2022), regardless of the uptake in devices in 2020 due to COVID-19 lockdowns. The internet looks to only get faster from here.
By examining trends and factors such as download speeds, upload speeds, latency, and signal strength across different geographical locations and periods, researchers can uncover patterns and identify areas for improvement. This analysis can be particularly informative for users who must make informed decisions when choosing between mobile or fixed-line connections.
However, it is essential to acknowledge certain limitations of the dataset. Firstly, the data is acquired through voluntary testing, which may only partially represent part of the population. Users who run the speedtest application often do so with a specific purpose, which can introduce a self-selection bias that impacts the study's representativeness.
Additionally, the dataset is gathered only when there is a working line internet connection, or when mobile users being within the coverage of a 4G or 5G tower. Therefore, this study does not consider factors such as internet disconnections, power outages, and hardware specifications.
Despite being informative to consumers, due to the limitations, it is recommended to conduct a proper data gathering to support evidence-based decision-making for policy formulation, network planning, and comparative analysis.